The goal of this notebook is to compare label transfer results between:

  • Label transfer code with Azimuth currently in main at commit 6af112d. These results are referred to as "azimuth".
  • Label transfer code adapted from Azimuth. These results are referred to as "adapted_azimuth".

Setup

knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(future.globals.maxSize = 891289600000000)

suppressPackageStartupMessages({
  library(tidyverse)
  library(patchwork)
  library(Seurat)
})

repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")  
result_dir <- file.path(module_base, "results")


# functions to perform label transfer with azimuth-adapted approach
source(
  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
)

# Output files
full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")

Functions

# Make a heatmap of counts for label transfer strategies
plot_count_heatmap <- function(df, title, sample_id) {
  all_preds <- union(df$azimuth, df$adapted_azimuth)
  
  plotme <- data.frame(
    azimuth = all_preds, 
    adapted_azimuth = all_preds
  ) |> 
    expand(azimuth, adapted_azimuth) |>
    mutate(n = NA_integer_) |>
    anti_join(distinct(df)) |>
    bind_rows(
      df |> count(azimuth, adapted_azimuth)
    ) |> 
    arrange(azimuth) |>
    mutate(
      color = case_when(
        is.na(n) ~ "white", 
        n <= 20 ~ "grey90",
        n <= 50 ~ "lightblue",
        n <= 100 ~ "cornflowerblue", 
        n <= 500 ~ "red",
        n <= 1000 ~ "yellow2",
        .default = "yellow"
      )
    )

    ggplot(plotme) + 
      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
      geom_tile(alpha = 0.5) + 
      geom_abline(color = "firebrick", alpha = 0.5) +
      geom_text(size = 3.5) + 
      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
      scale_fill_identity() +
      theme_bw() + 
      theme(axis.text.y = element_text(size = 7), 
            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
            legend.position = "bottom", 
            legend.title = element_text(size = 9), 
            legend.text = element_text(size = 8)) +
      labs(
        title = glue::glue("{sample_id}: {str_to_title(title)}")
      )
}


# Wrapper function to compare results between approaches
# Makes two plots:
# - heatmap comparing counts for cell labels between approaches
# - density plot of annotation scores for labels that agree and disagree between approaches
compare <- function(df, compare_column, score_column, title) {
  
  spread_df <- df |>
    select({{compare_column}}, barcode, version) |>
    pivot_wider(names_from = version, values_from = {{compare_column}})

  
  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))  
  
  disagree_barcodes <- spread_df |>
    filter(azimuth != adapted_azimuth) |>
    pull(barcode)

  df2 <- df |>
    mutate(
      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
      agree = fct_relevel(agree, "labels disagree", "labels agree")
    ) 
  
  density_plot <- ggplot(df2) + 
    aes(x = {{score_column}}, fill = agree) + 
    geom_density(alpha = 0.6) + 
    theme_bw() +
    ggtitle(
      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
    ) +
    theme(legend.position = "bottom")

  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))

}

Label transfer

This section both:

  • Reads in existing Azimuth label transfer results
  • Performs label transfer with Azimuth-adapted approach

If results are already available, we read in the files rather than regenerating results.

# sample ids to process
sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")

# read in seurat input objects, as needed
if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
  srat_objects <- sample_ids |>
    purrr::map(
      \(id) {
        srat <- readRDS(
          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")
        ))
        DefaultAssay(srat) <- "RNA"
        
        return(srat)
    })
  names(srat_objects) <- sample_ids
}

Label transfer for fetal full

if (!file.exists(full_results_file)) {
  
  # read reference
  ref <- readRDS(file.path(
  module_base,
    "results",
    "references",
    "cao_formatted_ref.rds"
  ))
  full_reference <- ref$reference
  full_refdata <- ref$refdata
  full_dims <- ref$dims
  full_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )

  
  fetal_full <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          full_reference,
          full_dims,
          full_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02a <- readRDS(
          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, full_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02a),
              version = "azimuth", 
              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, 
              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, 
              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
              predicted.organ = srat_02a$fetal_full_predicted.organ, 
              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
            ) 
          )
      }
    )
  write_rds(fetal_full, full_results_file)
} else {
  fetal_full <- read_rds(full_results_file)
}

Label transfer for fetal kidney

if (!file.exists(kidney_results_file)) {
  
  
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "stewart_formatted_ref.rds"
  ))
  
  # Pull out information from the reference object we need for label transfer
  kidney_reference <- ref$reference
  kidney_refdata <- ref$refdata
  kidney_dims <- ref$dims
  kidney_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )
  
  
  fetal_kidney <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          kidney_reference,
          kidney_dims,
          kidney_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02b <- readRDS(
          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, kidney_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02b),
              version = "azimuth", 
              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, 
              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, 
              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
            ) 
          )
      }
    )

  write_rds(fetal_kidney, kidney_results_file)
} else {
  fetal_kidney <- read_rds(kidney_results_file)
}

Compare results

We expect: - The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal - Any annotations that disagree should have low scores

Fetal full reference

Note that results from the L2 reference are not plotted because they are not used in cell type annotation.

fetal_full |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
      compare(dat, predicted.organ, predicted.organ.score, "organ")
    }
  )

Fetal kidney reference

fetal_kidney |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
    }
  )

Conclusions

The vast majority of the time, labels agree. Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected.

Additional notable differences are shown in tables below:

Fetal full reference:

  • The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial
  • Some other kidney-related differences are noted:
Sample Reference Count Azimuth Azimuth-adapted
SCPSC000179 L1 70 Metanephritic cells Intestinal epithelial cells
SCPSC000179 Organ 64 Kidney Intestine
SCPSC000179 Organ 20 Lung Kidney
SCPSC000194 L1 60 Stromal cells Mesangial cells
SCPSC000194 Organ 35 Kidney Intestine
SCPSC000194 Organ 36 Lung Kidney
SCPSC000205 Organ 56 Kidney Intestine
SCPSC000208 L1 101 Mesangial cells Metanephritic cells
SCPSC000208 L1 101 Intestinal epithelial cells Metanephritic cells
SCPSC000208 Organ 149 Kidney Intestine

Fetal kidney reference:

  • There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184. This is the main discrepancy.
  • Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table.
    • kidney cell vs podocyte
    • kidney epithelial cell vs kidney cell
    • mesenchymal cell vs mesenchymal stem cell
  • There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor.
  • Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. kidney epithelial cell vs kidney cell).
Sample Reference Count Azimuth Azimuth-adapted
SCPSC000179 cell type 202 mesenchymal cell kidney epithelial cell
SCPSC000184 compartment 111 fetal nephron stroma
SCPSC000184 cell type 536 kidney epithelial cell mesenchymal cell
SCPSC000194 compartment 565 fetal nephron stroma
SCPSC000194 cell type 89 kidney epithelial cell mesenchymal cell
SCPSC000205 compartment 684 fetal nephron stroma
SCPSC000208 compartment 2111 fetal nephron stroma

Session Info

sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS 15.1

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] Seurat_5.1.0       SeuratObject_5.0.2 sp_2.1-4           patchwork_1.2.0    lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
 [8] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0   

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3     rstudioapi_0.16.0      jsonlite_1.8.8         magrittr_2.0.3         spatstat.utils_3.1-0   farver_2.1.2          
  [7] rmarkdown_2.28         vctrs_0.6.5            ROCR_1.0-11            spatstat.explore_3.3-2 htmltools_0.5.8.1      sass_0.4.9            
 [13] sctransform_0.4.1      parallelly_1.38.0      KernSmooth_2.23-24     bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
 [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12             cachem_1.1.0           igraph_2.0.3           mime_0.12             
 [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0           R6_2.5.1               fastmap_1.2.0          fitdistrplus_1.2-1    
 [31] future_1.34.0          shiny_1.9.1            digest_0.6.37          colorspace_2.1-1       rprojroot_2.0.4        tensor_1.5            
 [37] RSpectra_0.16-2        irlba_2.3.5.1          labeling_0.4.3         progressr_0.14.0       fansi_1.0.6            spatstat.sparse_3.1-0 
 [43] timechange_0.3.0       httr_1.4.7             polyclip_1.10-7        abind_1.4-5            compiler_4.4.1         withr_3.0.1           
 [49] fastDummies_1.7.4      MASS_7.3-61            tools_4.4.1            lmtest_0.9-40          httpuv_1.6.15          future.apply_1.11.2   
 [55] goftest_1.2-3          glue_1.7.0             nlme_3.1-166           promises_1.3.0         grid_4.4.1             Rtsne_0.17            
 [61] cluster_2.1.6          reshape2_1.4.4         generics_0.1.3         gtable_0.3.5           spatstat.data_3.1-2    tzdb_0.4.0            
 [67] data.table_1.16.0      hms_1.1.3              utf8_1.2.4             spatstat.geom_3.3-2    RcppAnnoy_0.0.22       ggrepel_0.9.5         
 [73] RANN_2.6.2             pillar_1.9.0           spam_2.10-0            RcppHNSW_0.6.0         later_1.3.2            splines_4.4.1         
 [79] lattice_0.22-6         renv_1.0.7             survival_3.7-0         deldir_2.0-4           tidyselect_1.2.1       miniUI_0.1.1.1        
 [85] pbapply_1.7-2          knitr_1.48             gridExtra_2.3          scattermore_1.2        xfun_0.47              matrixStats_1.3.0     
 [91] stringi_1.8.4          lazyeval_0.2.2         yaml_2.3.10            evaluate_0.24.0        codetools_0.2-20       BiocManager_1.30.25   
 [97] cli_3.6.3              uwot_0.2.2             xtable_1.8-4           reticulate_1.38.0      munsell_0.5.1          jquerylib_0.1.4       
[103] Rcpp_1.0.13            globals_0.16.3         spatstat.random_3.3-1  png_0.1-8              spatstat.univar_3.0-0  parallel_4.4.1        
[109] dotCall64_1.1-1        listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0           ggridges_0.5.6         leiden_0.4.3.1        
[115] rlang_1.1.4            cowplot_1.1.3         
---
title: "Compare label transfer results between Azimuth and Azimuth-adapted strategy"
author: Stephanie Spielman, Data Lab
output: 
  html_notebook: 
    toc: yes
    toc_float: yes
params:
  seed: 12345
---


The goal of this notebook is to compare label transfer results between:

- Label transfer code with Azimuth currently in `main` at commit `6af112d`. These results are referred to as `"azimuth"`.
- Label transfer code adapted from Azimuth. These results are referred to as `"adapted_azimuth"`.


## Setup

```{r setup}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(future.globals.maxSize = 891289600000000)

suppressPackageStartupMessages({
  library(tidyverse)
  library(patchwork)
  library(Seurat)
})

repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")  
result_dir <- file.path(module_base, "results")


# functions to perform label transfer with azimuth-adapted approach
source(
  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
)

# Output files
full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")
```

## Functions

```{r functions}
# Make a heatmap of counts for label transfer strategies
plot_count_heatmap <- function(df, title, sample_id) {
  all_preds <- union(df$azimuth, df$adapted_azimuth)
  
  plotme <- data.frame(
    azimuth = all_preds, 
    adapted_azimuth = all_preds
  ) |> 
    expand(azimuth, adapted_azimuth) |>
    mutate(n = NA_integer_) |>
    anti_join(distinct(df)) |>
    bind_rows(
      df |> count(azimuth, adapted_azimuth)
    ) |> 
    arrange(azimuth) |>
    mutate(
      color = case_when(
        is.na(n) ~ "white", 
        n <= 20 ~ "grey90",
        n <= 50 ~ "lightblue",
        n <= 100 ~ "cornflowerblue", 
        n <= 500 ~ "red",
        n <= 1000 ~ "yellow2",
        .default = "yellow"
      )
    )

    ggplot(plotme) + 
      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
      geom_tile(alpha = 0.5) + 
      geom_abline(color = "firebrick", alpha = 0.5) +
      geom_text(size = 3.5) + 
      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
      scale_fill_identity() +
      theme_bw() + 
      theme(axis.text.y = element_text(size = 7), 
            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
            legend.position = "bottom", 
            legend.title = element_text(size = 9), 
            legend.text = element_text(size = 8)) +
      labs(
        title = glue::glue("{sample_id}: {str_to_title(title)}")
      )
}


# Wrapper function to compare results between approaches
# Makes two plots:
# - heatmap comparing counts for cell labels between approaches
# - density plot of annotation scores for labels that agree and disagree between approaches
compare <- function(df, compare_column, score_column, title) {
  
  spread_df <- df |>
    select({{compare_column}}, barcode, version) |>
    pivot_wider(names_from = version, values_from = {{compare_column}})

  
  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))  
  
  disagree_barcodes <- spread_df |>
    filter(azimuth != adapted_azimuth) |>
    pull(barcode)

  df2 <- df |>
    mutate(
      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
      agree = fct_relevel(agree, "labels disagree", "labels agree")
    ) 
  
  density_plot <- ggplot(df2) + 
    aes(x = {{score_column}}, fill = agree) + 
    geom_density(alpha = 0.6) + 
    theme_bw() +
    ggtitle(
      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
    ) +
    theme(legend.position = "bottom")

  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))

}
```


## Label transfer

This section both:

- Reads in existing Azimuth label transfer results
- Performs label transfer with Azimuth-adapted approach

If results are already available, we read in the files rather than regenerating results.

```{r}
# sample ids to process
sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")

# read in seurat input objects, as needed
if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
  srat_objects <- sample_ids |>
    purrr::map(
      \(id) {
        srat <- readRDS(
          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")
        ))
        DefaultAssay(srat) <- "RNA"
        
        return(srat)
    })
  names(srat_objects) <- sample_ids
}
```


### Label transfer for fetal full

```{r}
if (!file.exists(full_results_file)) {
  
  # read reference
  ref <- readRDS(file.path(
  module_base,
    "results",
    "references",
    "cao_formatted_ref.rds"
  ))
  full_reference <- ref$reference
  full_refdata <- ref$refdata
  full_dims <- ref$dims
  full_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )

  
  fetal_full <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          full_reference,
          full_dims,
          full_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02a <- readRDS(
          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, full_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02a),
              version = "azimuth", 
              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, 
              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, 
              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
              predicted.organ = srat_02a$fetal_full_predicted.organ, 
              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
            ) 
          )
      }
    )
  write_rds(fetal_full, full_results_file)
} else {
  fetal_full <- read_rds(full_results_file)
}
```


### Label transfer for fetal kidney


```{r}
if (!file.exists(kidney_results_file)) {
  
  
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "stewart_formatted_ref.rds"
  ))
  
  # Pull out information from the reference object we need for label transfer
  kidney_reference <- ref$reference
  kidney_refdata <- ref$refdata
  kidney_dims <- ref$dims
  kidney_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )
  
  
  fetal_kidney <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          kidney_reference,
          kidney_dims,
          kidney_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02b <- readRDS(
          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, kidney_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02b),
              version = "azimuth", 
              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, 
              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, 
              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
            ) 
          )
      }
    )

  write_rds(fetal_kidney, kidney_results_file)
} else {
  fetal_kidney <- read_rds(kidney_results_file)
}
```


## Compare results

We expect:
- The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal
- Any annotations that disagree should have low scores


### Fetal full reference

Note that results from the L2 reference are not plotted because they are not used in cell type annotation.


```{r fig.height=8, fig.width=14}
fetal_full |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
      compare(dat, predicted.organ, predicted.organ.score, "organ")
    }
  )
```


### Fetal kidney reference

```{r fig.height=8, fig.width=14}
fetal_kidney |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
    }
  )
```



## Conclusions

The vast majority of the time, labels agree. 
Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected.
 
Additional notable differences are shown in tables below:
    
### Fetal full reference:

- The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial
- Some other kidney-related differences are noted:

| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | L1 | 70 | Metanephritic cells | Intestinal epithelial cells | 
| SCPSC000179 | Organ | 64 | Kidney | Intestine | 
| SCPSC000179 | Organ | 20 | Lung | Kidney | 
| SCPSC000194 | L1 | 60 | Stromal cells | Mesangial cells | 
| SCPSC000194 | Organ | 35 | Kidney | Intestine | 
| SCPSC000194 | Organ | 36 | Lung | Kidney | 
| SCPSC000205 | Organ | 56 | Kidney | Intestine |
| SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells |  
| SCPSC000208 | L1 | 101 | Intestinal epithelial cells | Metanephritic cells |  
| SCPSC000208 | Organ | 149 | Kidney | Intestine | 


### Fetal kidney reference:


- There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184.
This is the main discrepancy.
- Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table.
   - `kidney cell` vs `podocyte`
   - `kidney epithelial cell` vs `kidney cell` 
   - `mesenchymal cell` vs `mesenchymal stem cell` 
- There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor.
- Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. `kidney epithelial cell` vs `kidney cell`).


| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | cell type | 202 | mesenchymal cell | kidney epithelial cell | 
| SCPSC000184 | compartment | 111 | fetal nephron |  stroma | 
| SCPSC000184 | cell type | 536 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000194 | compartment | 565 | fetal nephron |  stroma | 
| SCPSC000194 | cell type  | 89 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000205 | compartment  | 684 | fetal nephron |  stroma | 
| SCPSC000208 | compartment  | 2111 | fetal nephron |  stroma | 


## Session Info

```{r}
sessionInfo()
```
